from pipeline example described by Pierre-Luc Germain, course sta426 UZH

Loading necessary libraries

suppressPackageStartupMessages({
  library(SingleCellExperiment)
  library(scran)
  library(scater)
  library(batchelor)
  library(scDblFinder)
  library(sctransform)
  library(muscat)
  library(SEtools)
  library(cowplot)
  library(BiocParallel)
  library(ComplexHeatmap)
  
  library(DropletUtils) # for read10xCounts
  library(Matrix) # to handle sparse matrix
  library(BiocNeighbors) # for kNN graph
  library(igraph) # for cluster_louvain
})
## Warning: replacing previous import 'ComplexHeatmap::pheatmap' by
## 'pheatmap::pheatmap' when loading 'SEtools'

Preprocessing & Clustering

Loading the data

## data can be downloaded from : http://imlspenticton.uzh.ch/dump/files_for_levesque.tar

# set local path
local.path <- getwd()
setwd(local.path)

#define data we want to look at
sample_names <- list("patient1_HS", "patient1_SCC", "patient2_HS", "patient2_AK")
patient1_HS.path <- file.path("data", "patient1_HS")
patient1_SCC.path <- file.path("data", "patient1_SCC")
patient2_HS.path <- file.path("data", "patient2_HS")
patient2_AK.path <- file.path("data", "patient2_AK")

# read in filtered data and create list of SingleCellExperiment objects
paths <- list(patient1_HS.path, patient1_SCC.path, patient2_HS.path, patient2_AK.path)
sces <- lapply(paths, function(i) read10xCounts(file.path(i, "outs/filtered_feature_bc_matrix"), col.names = TRUE))
split_data <- function(sceo) {
  # Split the data, store ADT in alternative experiment
  sceo <- splitAltExps(sceo, rowData(sceo)$Type)
  
  # Coerce sparse matrix for ADT into a dense matrix
  counts(altExp(sceo)) <- as.matrix(counts(altExp(sceo)))
  sceo
}

sces <- lapply(sces, split_data)

# only select the gene expression data, drop the antibody capture
sce.patient1_HS <- sces[[1]]
sce.patient1_SCC <- sces[[2]]
sce.patient2_HS <- sces[[3]]
sce.patient2_AK <- sces[[4]]

# add name to sce for output graph
attr(sce.patient1_HS, "name") <- "Patient1 HS"
attr(sce.patient1_SCC, "name") <- "Patient1 SCC"
attr(sce.patient2_HS, "name") <- "Patient2 HS"
attr(sce.patient2_AK, "name") <- "Patient2 AK"

Normalization & reduction

# get rid of seldom detected genes
remove_rare_genes <- function(sceo, num_genes) {
  sceo <- sceo[(rowSums(counts(sceo) > 0) > num_genes),]
}

variance_stabilization <- function(sceo, num_genes) {

  sceo <- remove_rare_genes(sceo, 4)
  vsto <- suppressWarnings(sctransform::vst(counts(sceo)))
  logcounts(sceo, withDimnames=FALSE) <- vsto$y
  
  # Check that new assay was added to sce
  #assays(sce.patient1_HS)
  
  # get highly-variable genes
  hvgo <- row.names(sceo)[order(vsto$gene_attr$residual_variance, decreasing=TRUE)[1:num_genes]]
  results <- list("sceo" <- sceo, "hvgo" <- hvgo)
  return(results)
}

results <- variance_stabilization(sce.patient1_HS, 2000)
## Calculating cell attributes from input UMI matrix: log_umi
## Variance stabilizing transformation of count matrix of size 21242 by 4409
## Model formula is y ~ log_umi
## Get Negative Binomial regression parameters per gene
## Using 2000 genes, 4409 cells
## Found 116 outliers - those will be ignored in fitting/regularization step
## Second step: Get residuals using fitted parameters for 21242 genes
## Calculating gene attributes
## Wall clock passed: Time difference of 58.8078 secs
sce.patient1_HS <- results[[1]]
hvg.patient1_HS <- results[[2]]

results <- variance_stabilization(sce.patient1_SCC, 2000)
## Calculating cell attributes from input UMI matrix: log_umi
## Variance stabilizing transformation of count matrix of size 20392 by 3073
## Model formula is y ~ log_umi
## Get Negative Binomial regression parameters per gene
## Using 2000 genes, 3073 cells
## Found 87 outliers - those will be ignored in fitting/regularization step
## Second step: Get residuals using fitted parameters for 20392 genes
## Calculating gene attributes
## Wall clock passed: Time difference of 37.9194 secs
sce.patient1_SCC <- results[[1]]
hvg.patient1_SCC <- results[[2]]

results <- variance_stabilization(sce.patient2_HS, 2000)
## Calculating cell attributes from input UMI matrix: log_umi
## Variance stabilizing transformation of count matrix of size 20909 by 8244
## Model formula is y ~ log_umi
## Get Negative Binomial regression parameters per gene
## Using 2000 genes, 8244 cells
## Found 167 outliers - those will be ignored in fitting/regularization step
## Second step: Get residuals using fitted parameters for 20909 genes
## Calculating gene attributes
## Wall clock passed: Time difference of 1.759786 mins
sce.patient2_HS <- results[[1]]
hvg.patient2_HS <- results[[2]]

results <- variance_stabilization(sce.patient2_AK, 2000)
## Calculating cell attributes from input UMI matrix: log_umi
## Variance stabilizing transformation of count matrix of size 20270 by 5036
## Model formula is y ~ log_umi
## Get Negative Binomial regression parameters per gene
## Using 2000 genes, 5036 cells
## Found 119 outliers - those will be ignored in fitting/regularization step
## Second step: Get residuals using fitted parameters for 20270 genes
## Calculating gene attributes
## Wall clock passed: Time difference of 59.55612 secs
sce.patient2_AK <- results[[1]]
hvg.patient2_AK <- results[[2]]

ADD A NICE TABLE WITH ALL THE MARKER USED (and their source) AND EXPLAINED WHICH WEREN’T FOUND also explain that we decided to add the known markers from the litterature into the group of genes used to produce a low-dimensional representation of the data even when they were not part of the most variable genes

# run PCA

# using known genes
# genes not found in the data : "DCDN" "WISP2" "SEPP1" "TYP1" 
# genes linked to pericytevSMC : "ACTA2","TAGLN"
# were removed from the following list
known_markers <- c("DSC3","DSP","LGALS7","KRT5","KRT14","COL17A1","KRT1","KRT10","LOR", "SPINK5","KRT6B","GJB2","GJB6","ATP1B1","MGST1","FASN","DCD","AQP5","LUM","MMP2","SLPI","CTHRC1","MFAP5","TSPAN8","CCL19","APOE","CXCL2","CXCL3","EFEMP1","APCDD1","ID1","WIF1","COL18A1","PTGDS","ASPN","POSTN","GPC3","TNN","SFRP1","RGS5","MYL9","TPM2","RERGL","PECAM1","SELE","CLDN5","VWF","PROX1","LYVE1","HLA-DRA","FCER1G","TYROBP","AIF1","CD1C","CD207","CD68","RNASE1","ITGAX","CD3D","CD3E","CD52","IL7R","DCT","MLANA","PMEL","CDH19","NGFR","UBE2C","PCNA")


sc_PCA <- function(sceo, hvgo, known_markers) {
  # only gives index of the first encountered
  known_genes <- rownames(sceo)[which(rowData(sceo)$Symbol %in% known_markers)]
  
  # Check that the genes we know are also part of the highly variable genes
  idx_notfound <- which(!(known_genes %in% hvgo))
  
  # print the markers that were not found in the data
  #known_markers[idx_notfound]
  
  # if some of the known markers were not selected, add them
  if (length(idx_notfound) > 0) {
    cat("Adding", known_markers[idx_notfound], "to the gene set used for PCA in", attr(sceo, "name"), "\n")
    hvgo <- c(hvgo, known_genes[idx_notfound])
  }
    
  # using highly variable genes
  sceo <- runPCA(sceo, subset_row=hvgo)
  
  # check the variance explained by the PCs:
  pc.var <- attr(reducedDim(sceo),"percentVar")
  plot(pc.var, xlab="PCs", ylab="% variance explained", main=paste("Variance explained across PCs for", attr(sceo, "name")))
  
  # restrict to the first 20 components:
  reducedDim(sceo) <- reducedDim(sceo)[,1:20]
  
  # run TSNE and UMAP based on the PCA:
  sceo <- runTSNE(sceo, dimred="PCA")
  sceo <- runUMAP(sceo, dimred="PCA")
}

sce.patient1_HS <- sc_PCA(sce.patient1_HS, hvg.patient1_HS, known_markers)
## Adding IL7R to the gene set used for PCA in Patient1 HS

sce.patient1_SCC <- sc_PCA(sce.patient1_SCC, hvg.patient1_SCC, known_markers)
## Adding KRT10 CXCL2 TNN SFRP1 AIF1 CD3E NGFR to the gene set used for PCA in Patient1 SCC

sce.patient2_HS <- sc_PCA(sce.patient2_HS, hvg.patient2_HS, known_markers)
## Adding WIF1 AIF1 CD207 ITGAX CD3E to the gene set used for PCA in Patient2 HS

sce.patient2_AK <- sc_PCA(sce.patient2_AK, hvg.patient2_AK, known_markers)
## Adding SFRP1 FCER1G ITGAX CD3E to the gene set used for PCA in Patient2 AK

Clustering

sc_cluster <- function(sceo, k=30) {
  go <- buildKNNGraph(sceo, BNPARAM=AnnoyParam(), use.dimred="PCA", k=k)

  sceo$cluster <- as.factor(cluster_louvain(go)$membership)

  #plots <- plot_grid( plotTSNE(sceo, colour_by="cluster", text_by="cluster") + ggtitle(attr(sceo, "name")), plotUMAP(sceo, colour_by="cluster", text_by="cluster") + ggtitle(attr(sceo, "name")) ) 
  
  plots <- plot_grid( 
    plotTSNE(sceo, colour_by="cluster", text_by="cluster"), 
    plotUMAP(sceo, colour_by="cluster", text_by="cluster"))
  title <- ggdraw() + draw_label(attr(sceo, "name"), fontface='bold')
  plots <- plot_grid(title, plots, ncol=1, rel_heights=c(0.1, 1))
  print(plots)
  
  return(list(sceo, go))
}

results <- sc_cluster(sce.patient1_HS)

sce.patient1_HS <- results[[1]]
g.patient1_HS <- results[[2]]

results <- sc_cluster(sce.patient1_SCC)

sce.patient1_SCC <- results[[1]]
g.patient1_SCC <- results[[2]]

results <- sc_cluster(sce.patient2_HS)

sce.patient2_HS <- results[[1]]
g.patient2_HS <- results[[2]]

results <- sc_cluster(sce.patient2_AK)

sce.patient2_AK <- results[[1]]
g.patient2_AK <- results[[2]]

Cluster annotation

Known markers

# genes not found in our data : "DCDN" "WISP2" "SEPP1" "TYP1" 
# were removed from the list below
genes <- list(
  # classification: https://www.sciencedirect.com/science/article/pii/S0923181120301985?via%3Dihub#bib0050
  # KERATINOCYTE
  keratinocyte = c("DSC3","DSP","LGALS7"),
  #keratinocyte_basal = c("KRT5","KRT14","COL17A1"),
  #keratinocyte_suprabasal = c("KRT1","KRT10"),
  #keratinocyte_differentiated = c("LOR", "SPINK5"),
  #keratinocyte_ORS = c("KRT6B"),
  #keratinocyte_channel = c("GJB2","GJB6","ATP1B1"),
  #keratinocyte_sebaceous_gland = c("MGST1","FASN"),
  #keratinocyte_sweat_gland  = c("DCD","AQP5"),
  # FIBROBLAST
  fibroblast  = c("LUM","MMP2"),
  # fibroblast subclassification  from: https://www.nature.com/articles/s42003-020-0922-4#
  #fibroblast_secretory_reticular = c(SLPI","CTHRC1","MFAP5","TSPAN8"),
  #fibroblast_proinflammatory = c("CCL19","APOE","CXCL2","CXCL3","EFEMP1"),
  #fibroblast_secretory_papillary = c("APCDD1","ID1","WIF1","COL18A1","PTGDS"),
  #fibroblast_mesenchymal = c("ASPN","POSTN","GPC3","TNN","SFRP1"),
  # PERICYTE & vSMC
  #pericytevSMC  = c("ACTA2","TAGLN"), #vSMC : vascular smooth muscel cell
  #pericytevSMC_pericyte  = c("RGS5"),
  #pericytevSMC_vSMC  = c("MYL9","TPM2","RERGL"),
  # ENDOTHELIAL CELL
  endothelial  = c("PECAM1","SELE","CLDN5","VWF"),
  #endothelial_lymphatic  = c("PROX1","LYVE1"),
  # MYELOID CELL
  myeloid  = c("HLA-DRA","FCER1G","TYROBP","AIF1"),
  #myeloid_dendritic  = c("CD1C"),
  #myeloid_langerhans  = c("CD207"),
  #myeloid_macrophage = c("CD68","RNASE1","ITGAX"),
  # LYMPHOCYTE
  lymphocyte  = c("CD3D","CD3E","CD52","IL7R"),
  # MELANOCYTE
  melanocyte  = c("DCT","MLANA","PMEL"),
  # SCHWANN CELL
  schwann  = c("CDH19","NGFR"),
  # MITOTIC CELL
  mitotic  = c("UBE2C","PCNA")
)

convert_rownames <- function(sceo, go, genes) {
  return(paste(rownames(sceo), rowData(sceo)$Symbol, sep = "."))
}

annotate_cells <- function(sceo, go, genes) {
  kmo <- lapply(genes, FUN=function(go) grep(paste0(go, "$", collapse="|"), rownames(sceo), value=TRUE))
  
  # TODO: try to modify this so that we don't have to convert the rownames anymore and use
  # the marker names save in rowData(sceo)$Symbol directly
  #kmo <- lapply(genes, FUN=function(go) grep(paste0(go, "$", collapse="|"), rowData(sceo)$Symbol, value=TRUE))
  #print(kmo)
  
  return(list(sceo, kmo))
}


rownames(sce.patient1_HS) <- convert_rownames(sce.patient1_HS, g.patient1_HS, genes)
results <- annotate_cells(sce.patient1_HS, g.patient1_HS, genes)
sce.patient1_HS <- results[[1]]
km.patient1_HS <- results[[2]]

rownames(sce.patient1_SCC) <- convert_rownames(sce.patient1_SCC, g.patient1_SCC, genes)
results <- annotate_cells(sce.patient1_SCC, g.patient1_SCC, genes)
sce.patient1_SCC <- results[[1]]
km.patient1_SCC <- results[[2]]

rownames(sce.patient2_HS) <- convert_rownames(sce.patient2_HS, g.patient2_HS, genes)
results <- annotate_cells(sce.patient2_HS, g.patient2_HS, genes)
sce.patient2_HS <- results[[1]]
km.patient2_HS <- results[[2]]

rownames(sce.patient2_AK) <- convert_rownames(sce.patient2_AK, g.patient2_AK, genes)
results <- annotate_cells(sce.patient2_AK, g.patient2_AK, genes)
sce.patient2_AK <- results[[1]]
km.patient2_AK <- results[[2]]

Pseudo-bulk aggregation

# mean logcounts by cluster
pseudobulk <- function(sceo, kmo) {
  pbo <- aggregateData(sceo, "logcounts", by=c("cluster"), fun="mean")

  # TODO : find a way to delete the left annotation which is unreadable
  
  # build a heatmap of the mean logcounts of the known markers:
  h <- pheatmap(assay(pbo)[unlist(kmo),], annotation_row=data.frame(row.names=unlist(kmo), type=rep(names(kmo), lengths(kmo))), split=rep(names(kmo), lengths(kmo)), annotation_names_row=F, cluster_rows=FALSE, scale="row", main=paste(attr(sceo, "name"),"before markers aggregation"), fontsize_row=6, fontsize_col=10, angle_col = "45")
  print(h)
  
  #--- aggregation markers
  # we will assign clusters to the cell type whose markers are the most expressed
  
  # we extract the pseudo-bulk counts of the markers for each cluster
  mato <- assay(pbo)[unlist(kmo),]
  
  # we aggregate across markers of the same type
  mato <- aggregate(t(scale(t(mato))), by=list(type=rep(names(kmo), lengths(kmo))), FUN=sum)
  
  # for each column (cluster), we select the row (cell type) which has the maximum aggregated value
  cl2o<- mato[,1][apply(mato[,-1], 2, FUN=which.max)]
  # we convert the cells' cluster labels to cell type labels:
  sceo$cluster2 <- cl2o[sceo$cluster]
  
  # we aggregate again to pseudo-bulk using the new clusters
  pbo <- aggregateData(sceo, "logcounts", by=c("cluster2"), fun="mean")
  
  # we plot again the expression of the markers as a sanity check
  
  # If we want to hide the gene markers show_rownames = FALSE
  h1 <- pheatmap(assay(pbo)[unlist(kmo),], annotation_row=data.frame(row.names=unlist(kmo), type=rep(names(kmo), lengths(kmo))), split=rep(names(kmo), lengths(kmo)), annotation_names_row=F, cluster_rows=FALSE, scale="row", main=paste(attr(sceo, "name"),"after markers aggregation"), fontsize_row=6, fontsize_col=10, angle_col = "45")
  print(h1)
  
  # UMAP plot
  p <- plotUMAP(sceo, colour_by="cluster2", text_by="cluster2", point_size=1) + ggtitle(attr(sceo, "name")) + theme(legend.title=element_blank())
  print(p)
  
  return(list(sceo, pbo, mato, cl2o))
}

#Patient 1 HS
#km.patient1_HS
#km.patient1_SCC
#rowData(sce.patient1_HS[c("keratinocyte", "fibroblast", "endothelial", "myeloid", )])$Symbol

results <- pseudobulk(sce.patient1_HS, km.patient1_HS)

sce.patient1_HS <- results[[1]]
pb.patient1_HS <- results[[2]]
mat.patient1_HS <- results[[3]]
cl2.patient1_HS <- results[[4]]

#Patient 1 SCC
results <- pseudobulk(sce.patient1_SCC, km.patient1_SCC)

sce.patient1_SCC <- results[[1]]
pb.patient1_SCC <- results[[2]]
mat.patient1_SCC <- results[[3]]
cl2.patient1_SCC <- results[[4]]

#Patient 2 HS
results <- pseudobulk(sce.patient2_HS, km.patient2_HS)

sce.patient2_HS <- results[[1]]
pb.patient2_HS <- results[[2]]
mat.patient2_HS <- results[[3]]
cl2.patient2_HS <- results[[4]]

#Patient 2 AK
results <- pseudobulk(sce.patient2_AK, km.patient2_AK)

sce.patient2_AK <- results[[1]]
pb.patient2_AK <- results[[2]]
mat.patient2_AK <- results[[3]]
cl2.patient2_AK <- results[[4]]

Sub population

cluster_subtype <- function(sceo, cell_type, known_markers, subtype_markers) {
  # select the cell labeled as keratinocytes in the previous step
  sceo.sub <- sceo[,sceo$cluster2==cell_type]
  
  sceo.sub <- remove_rare_genes(sceo.sub, 4)
  results <- variance_stabilization(sceo.sub, 2000)
  
  sceo.sub <- results[[1]]
  hvgo.sub <- results[[2]]
  
  #---- run the pipeline again on that subdataset
  sceo.sub <- sc_PCA(sceo.sub, hvgo.sub, known_markers)
  
  #--- clustering
  
  results <- sc_cluster(sceo.sub)
  sceo.sub <- results[[1]]
  go.sub <- results[[2]]
  
  results <- annotate_cells(sceo.sub, go.sub, subtype_markers)
  
  sceo.sub <- results[[1]]
  kmo.sub <- results[[2]]
  
  #---
  
  results <- pseudobulk(sceo.sub, kmo.sub)
  
  sceo.sub <- results[[1]]
  pbo.sub <- results[[2]]
  mato.sub <- results[[3]]
  cl2o.sub <- results[[4]]
  
  return(sceo.sub)

}


known_markers.kera <- c("KRT5","KRT14","COL17A1","KRT1","KRT10","LOR", "SPINK5","KRT6B","GJB2","GJB6","ATP1B1","MGST1","FASN","DCD","AQP5")

#annotate the subclusters
  genes.kera <- list(
  # classification: https://www.sciencedirect.com/science/article/pii/S0923181120301985?via%3Dihub#bib0050
    # KERATINOCYTE
    #keratinocyte = c("DSC3","DSP","LGALS7"),
    basal = c("KRT5","KRT14","COL17A1"),
    suprabasal = c("KRT1","KRT10"),
    differentiated = c("LOR", "SPINK5"),
    ORS = c("KRT6B"),
    channel = c("GJB2","GJB6","ATP1B1"),
    sebaceous_gland = c("MGST1","FASN"),
    sweat_gland  = c("DCD","AQP5")
    )
  
  
# TODO : find a way to avoid giving both known_merkers.kera and genes.kera since both contain the 
  # same information in essence. Maybe will be fixed by the first todo
sce.patient1_HS.kera <- cluster_subtype(sce.patient1_HS, "keratinocyte", known_markers.kera, genes.kera)
## Calculating cell attributes from input UMI matrix: log_umi
## Variance stabilizing transformation of count matrix of size 17231 by 1265
## Model formula is y ~ log_umi
## Get Negative Binomial regression parameters per gene
## Using 2000 genes, 1265 cells
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |==================                                                    |  25%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |====================================================                  |  75%
  |                                                                            
  |======================================================================| 100%
## Found 61 outliers - those will be ignored in fitting/regularization step
## Second step: Get residuals using fitted parameters for 17231 genes
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |==                                                                    |   3%
  |                                                                            
  |====                                                                  |   6%
  |                                                                            
  |======                                                                |   9%
  |                                                                            
  |========                                                              |  11%
  |                                                                            
  |==========                                                            |  14%
  |                                                                            
  |============                                                          |  17%
  |                                                                            
  |==============                                                        |  20%
  |                                                                            
  |================                                                      |  23%
  |                                                                            
  |==================                                                    |  26%
  |                                                                            
  |====================                                                  |  29%
  |                                                                            
  |======================                                                |  31%
  |                                                                            
  |========================                                              |  34%
  |                                                                            
  |==========================                                            |  37%
  |                                                                            
  |============================                                          |  40%
  |                                                                            
  |==============================                                        |  43%
  |                                                                            
  |================================                                      |  46%
  |                                                                            
  |==================================                                    |  49%
  |                                                                            
  |====================================                                  |  51%
  |                                                                            
  |======================================                                |  54%
  |                                                                            
  |========================================                              |  57%
  |                                                                            
  |==========================================                            |  60%
  |                                                                            
  |============================================                          |  63%
  |                                                                            
  |==============================================                        |  66%
  |                                                                            
  |================================================                      |  69%
  |                                                                            
  |==================================================                    |  71%
  |                                                                            
  |====================================================                  |  74%
  |                                                                            
  |======================================================                |  77%
  |                                                                            
  |========================================================              |  80%
  |                                                                            
  |==========================================================            |  83%
  |                                                                            
  |============================================================          |  86%
  |                                                                            
  |==============================================================        |  89%
  |                                                                            
  |================================================================      |  91%
  |                                                                            
  |==================================================================    |  94%
  |                                                                            
  |====================================================================  |  97%
  |                                                                            
  |======================================================================| 100%
## Calculating gene attributes
## Wall clock passed: Time difference of 15.94926 secs
## Adding KRT5 KRT10 AQP5 to the gene set used for PCA in Patient1 HS

sce.patient1_SCC.kera <- cluster_subtype(sce.patient1_SCC, "keratinocyte", known_markers.kera, genes.kera)
## Calculating cell attributes from input UMI matrix: log_umi
## Variance stabilizing transformation of count matrix of size 16156 by 703
## Model formula is y ~ log_umi
## Get Negative Binomial regression parameters per gene
## Using 2000 genes, 703 cells
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |==================                                                    |  25%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |====================================================                  |  75%
  |                                                                            
  |======================================================================| 100%
## There are 3 estimated thetas smaller than 1e-07 - will be set to 1e-07
## Found 87 outliers - those will be ignored in fitting/regularization step
## Second step: Get residuals using fitted parameters for 16156 genes
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |==                                                                    |   3%
  |                                                                            
  |====                                                                  |   6%
  |                                                                            
  |======                                                                |   9%
  |                                                                            
  |========                                                              |  12%
  |                                                                            
  |===========                                                           |  15%
  |                                                                            
  |=============                                                         |  18%
  |                                                                            
  |===============                                                       |  21%
  |                                                                            
  |=================                                                     |  24%
  |                                                                            
  |===================                                                   |  27%
  |                                                                            
  |=====================                                                 |  30%
  |                                                                            
  |=======================                                               |  33%
  |                                                                            
  |=========================                                             |  36%
  |                                                                            
  |============================                                          |  39%
  |                                                                            
  |==============================                                        |  42%
  |                                                                            
  |================================                                      |  45%
  |                                                                            
  |==================================                                    |  48%
  |                                                                            
  |====================================                                  |  52%
  |                                                                            
  |======================================                                |  55%
  |                                                                            
  |========================================                              |  58%
  |                                                                            
  |==========================================                            |  61%
  |                                                                            
  |=============================================                         |  64%
  |                                                                            
  |===============================================                       |  67%
  |                                                                            
  |=================================================                     |  70%
  |                                                                            
  |===================================================                   |  73%
  |                                                                            
  |=====================================================                 |  76%
  |                                                                            
  |=======================================================               |  79%
  |                                                                            
  |=========================================================             |  82%
  |                                                                            
  |===========================================================           |  85%
  |                                                                            
  |==============================================================        |  88%
  |                                                                            
  |================================================================      |  91%
  |                                                                            
  |==================================================================    |  94%
  |                                                                            
  |====================================================================  |  97%
  |                                                                            
  |======================================================================| 100%
## Calculating gene attributes
## Wall clock passed: Time difference of 9.634956 secs
## Adding FASN to the gene set used for PCA in Patient1 SCC

sce.patient2_HS.kera <- cluster_subtype(sce.patient2_HS, "keratinocyte", known_markers.kera, genes.kera)
## Calculating cell attributes from input UMI matrix: log_umi
## Variance stabilizing transformation of count matrix of size 16526 by 2058
## Model formula is y ~ log_umi
## Get Negative Binomial regression parameters per gene
## Using 2000 genes, 2058 cells
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |==================                                                    |  25%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |====================================================                  |  75%
  |                                                                            
  |======================================================================| 100%
## Found 102 outliers - those will be ignored in fitting/regularization step
## Second step: Get residuals using fitted parameters for 16526 genes
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |==                                                                    |   3%
  |                                                                            
  |====                                                                  |   6%
  |                                                                            
  |======                                                                |   9%
  |                                                                            
  |========                                                              |  12%
  |                                                                            
  |==========                                                            |  15%
  |                                                                            
  |============                                                          |  18%
  |                                                                            
  |==============                                                        |  21%
  |                                                                            
  |================                                                      |  24%
  |                                                                            
  |===================                                                   |  26%
  |                                                                            
  |=====================                                                 |  29%
  |                                                                            
  |=======================                                               |  32%
  |                                                                            
  |=========================                                             |  35%
  |                                                                            
  |===========================                                           |  38%
  |                                                                            
  |=============================                                         |  41%
  |                                                                            
  |===============================                                       |  44%
  |                                                                            
  |=================================                                     |  47%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |=====================================                                 |  53%
  |                                                                            
  |=======================================                               |  56%
  |                                                                            
  |=========================================                             |  59%
  |                                                                            
  |===========================================                           |  62%
  |                                                                            
  |=============================================                         |  65%
  |                                                                            
  |===============================================                       |  68%
  |                                                                            
  |=================================================                     |  71%
  |                                                                            
  |===================================================                   |  74%
  |                                                                            
  |======================================================                |  76%
  |                                                                            
  |========================================================              |  79%
  |                                                                            
  |==========================================================            |  82%
  |                                                                            
  |============================================================          |  85%
  |                                                                            
  |==============================================================        |  88%
  |                                                                            
  |================================================================      |  91%
  |                                                                            
  |==================================================================    |  94%
  |                                                                            
  |====================================================================  |  97%
  |                                                                            
  |======================================================================| 100%
## Calculating gene attributes
## Wall clock passed: Time difference of 23.92052 secs
## Adding FASN to the gene set used for PCA in Patient2 HS

sce.patient2_AK.kera <- cluster_subtype(sce.patient2_AK, "keratinocyte", known_markers.kera, genes.kera)
## Calculating cell attributes from input UMI matrix: log_umi
## Variance stabilizing transformation of count matrix of size 16962 by 1153
## Model formula is y ~ log_umi
## Get Negative Binomial regression parameters per gene
## Using 2000 genes, 1153 cells
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |==================                                                    |  25%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |====================================================                  |  75%
  |                                                                            
  |======================================================================| 100%
## There are 1 estimated thetas smaller than 1e-07 - will be set to 1e-07
## Found 79 outliers - those will be ignored in fitting/regularization step
## Second step: Get residuals using fitted parameters for 16962 genes
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |==                                                                    |   3%
  |                                                                            
  |====                                                                  |   6%
  |                                                                            
  |======                                                                |   9%
  |                                                                            
  |========                                                              |  12%
  |                                                                            
  |==========                                                            |  15%
  |                                                                            
  |============                                                          |  18%
  |                                                                            
  |==============                                                        |  21%
  |                                                                            
  |================                                                      |  24%
  |                                                                            
  |===================                                                   |  26%
  |                                                                            
  |=====================                                                 |  29%
  |                                                                            
  |=======================                                               |  32%
  |                                                                            
  |=========================                                             |  35%
  |                                                                            
  |===========================                                           |  38%
  |                                                                            
  |=============================                                         |  41%
  |                                                                            
  |===============================                                       |  44%
  |                                                                            
  |=================================                                     |  47%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |=====================================                                 |  53%
  |                                                                            
  |=======================================                               |  56%
  |                                                                            
  |=========================================                             |  59%
  |                                                                            
  |===========================================                           |  62%
  |                                                                            
  |=============================================                         |  65%
  |                                                                            
  |===============================================                       |  68%
  |                                                                            
  |=================================================                     |  71%
  |                                                                            
  |===================================================                   |  74%
  |                                                                            
  |======================================================                |  76%
  |                                                                            
  |========================================================              |  79%
  |                                                                            
  |==========================================================            |  82%
  |                                                                            
  |============================================================          |  85%
  |                                                                            
  |==============================================================        |  88%
  |                                                                            
  |================================================================      |  91%
  |                                                                            
  |==================================================================    |  94%
  |                                                                            
  |====================================================================  |  97%
  |                                                                            
  |======================================================================| 100%
## Calculating gene attributes
## Wall clock passed: Time difference of 14.3774 secs

# need the previous block to be runned (at least until l. 298)

genes.dyn <- list(
  # classification: https://www.sciencedirect.com/science/article/pii/S0092867420306723
  keratinocyte_basal = "COL17A1",
  keratinocyte_cycling = "MKI67",
  keratinocyte_differentiating = "KRT1"
  )

dynamic_plot <- function(sceo, go, genes) {
    kmo <- lapply(genes, FUN=function(go) grep(paste0(go, "$", collapse="|"), rownames(sceo), value=TRUE))
  
  # mean logcounts by cluster:
  pbo <- aggregateData(sceo, "logcounts", by=c("cluster"), fun="mean")
  lengths(kmo)
  h <- pheatmap(assay(pbo)[unlist(kmo),], annotation_row=data.frame(row.names=unlist(kmo), type=rep(names(kmo), lengths(kmo))), split=rep(names(kmo), lengths(kmo)), cluster_rows=FALSE, scale="row", main="Before markers aggregation", fontsize_row=6, fontsize_col=10)
  h
  
  # we will assign clusters to the cell type whose markers are the most expressed
  # we extract the pseudo-bulk counts of the markers for each cluster
  mato <- assay(pbo)[unlist(kmo),]
  
  # we aggregate across markers of the same type
  mato <- aggregate(t(scale(t(mato))), by=list(type=rep(names(kmo), lengths(kmo))), FUN=sum)
  
  # for each column (cluster), we select the row (cell type) which has the maximum aggregated value
  cl3o <- mato[,1][apply(mato[,-1], 2, FUN=which.max)]
  
  # we convert the cells' cluster labels to cell type labels:
  sceo$cluster3 <- cl3o[sceo$cluster]
  
  print(sceo)
  
  # we aggregate again to pseudo-bulk using the new clusters
  pbo <- aggregateData(sceo, "logcounts", by=c("cluster3"), fun="mean")
  # we plot again the expression of the markers as a sanity check
  h1 <- pheatmap(assay(pbo)[unlist(kmo),], annotation_row=data.frame(row.names=unlist(kmo), type=rep(names(kmo), lengths(kmo))), split=rep(names(kmo), lengths(kmo)), cluster_rows=FALSE, scale="row", main="After markers aggregation", fontsize_row=6, fontsize_col=10) #, scale="row", main="After markers aggregation", fontsize_row=6, fontsize_col=10)
  h1
  plotUMAP(sceo, colour_by="cluster3", text_by="cluster3", point_size=1)
  
  
  #---- Histograms
  total <- ncol(sceo)
  basal <- ncol(sceo[,sceo$cluster3=="keratinocyte_basal"])
  cycling <- ncol(sceo[,sceo$cluster3=="keratinocyte_cycling"])
  diff <- ncol(sceo[,sceo$cluster3=="keratinocyte_differentiating"])
  counts <- c(basal, cycling, diff)
  percentage <- counts/total
  kera.dyn <- data.frame(names=c("Basal", "Cycling", "Differentiating"), percentage=percentage)
  kera.dyn
  
  p <- ggplot(data=kera.dyn, aes(x=names, y=percentage, fill=names)) + geom_bar(stat="identity") + labs(title=paste("Proportion of Keratinocyte states in", attr(sceo, "name")),x="Subtypes", y="Percentage") + ylim(0,1) + theme(legend.position='none')
  print(p)
  
}

dynamic_plot(sce.patient1_HS.kera, g.patient1_HS.kera, genes.dyn)
## class: SingleCellExperiment 
## dim: 17231 1265 
## metadata(1): Samples
## assays(2): counts logcounts
## rownames(17231): ENSG00000238009.AL627309.1 ENSG00000241860.AL627309.5
##   ... ENSG00000278817.AC007325.4 ENSG00000277196.AC007325.2
## rowData names(3): ID Symbol Type
## colnames(1265): AAACCTGCATAGAAAC-1 AAACCTGGTCTCATCC-1 ...
##   TTTGTCACATGGTCTA-1 TTTGTCAGTCCTAGCG-1
## colData names(5): Sample Barcode cluster cluster2 cluster3
## reducedDimNames(3): PCA TSNE UMAP
## altExpNames(1): Antibody Capture

dynamic_plot(sce.patient1_SCC.kera, g.patient1_SCC.kera, genes.dyn)
## class: SingleCellExperiment 
## dim: 16156 703 
## metadata(1): Samples
## assays(2): counts logcounts
## rownames(16156): ENSG00000241860.AL627309.5 ENSG00000237491.LINC01409
##   ... ENSG00000277666.AC136352.3 ENSG00000273554.AC136616.1
## rowData names(3): ID Symbol Type
## colnames(703): AAACCTGGTAGATTAG-1 AAACGGGGTCCGTGAC-1 ...
##   TTTGCGCGTCGGCATC-1 TTTGGTTAGTCATGCT-1
## colData names(5): Sample Barcode cluster cluster2 cluster3
## reducedDimNames(3): PCA TSNE UMAP
## altExpNames(1): Antibody Capture

dynamic_plot(sce.patient2_HS.kera, g.patient2_HS.kera, genes.dyn)
## class: SingleCellExperiment 
## dim: 16526 2058 
## metadata(1): Samples
## assays(2): counts logcounts
## rownames(16526): ENSG00000241860.AL627309.5 ENSG00000237491.LINC01409
##   ... ENSG00000278817.AC007325.4 ENSG00000277196.AC007325.2
## rowData names(3): ID Symbol Type
## colnames(2058): AAACCTGGTCAAACTC-1 AAACCTGGTTTAGCTG-1 ...
##   TTTGTCATCAGTCAGT-1 TTTGTCATCTCACATT-1
## colData names(5): Sample Barcode cluster cluster2 cluster3
## reducedDimNames(3): PCA TSNE UMAP
## altExpNames(1): Antibody Capture

dynamic_plot(sce.patient2_AK.kera, g.patient2_AK.kera, genes.dyn)
## class: SingleCellExperiment 
## dim: 16962 1153 
## metadata(1): Samples
## assays(2): counts logcounts
## rownames(16962): ENSG00000241860.AL627309.5 ENSG00000237491.LINC01409
##   ... ENSG00000278817.AC007325.4 ENSG00000277196.AC007325.2
## rowData names(3): ID Symbol Type
## colnames(1153): AAACCTGAGAGTTGGC-1 AAACCTGTCCTTGACC-1 ...
##   TTTGGTTGTGGTAACG-1 TTTGGTTTCTACCTGC-1
## colData names(5): Sample Barcode cluster cluster2 cluster3
## reducedDimNames(3): PCA TSNE UMAP
## altExpNames(1): Antibody Capture

known_markers.fibro <- c("SLPI","CTHRC1","MFAP5","TSPAN8","CCL19","APOE","CXCL2","CXCL3","EFEMP1","APCDD1","ID1","WIF1","COL18A1","PTGDS","ASPN","POSTN","GPC3","TNN","SFRP1")

genes.fibro <- list(
   #fibroblast  = c("LUM","MMP2"),
#   # fibroblast subclassification  from: https://www.nature.com/articles/s42003-020-0922-4#
   secretory_reticular = c("SLPI","CTHRC1","MFAP5","TSPAN8"),
   proinflammatory = c("CCL19","APOE","CXCL2","CXCL3","EFEMP1"),
   secretory_papillary = c("APCDD1","ID1","WIF1","COL18A1","PTGDS"),
   mesenchymal = c("ASPN","POSTN","GPC3","TNN","SFRP1")
)

sce.patient1_HS.fibro <- cluster_subtype(sce.patient1_HS, "fibroblast", known_markers.fibro, genes.fibro)
## Calculating cell attributes from input UMI matrix: log_umi
## Variance stabilizing transformation of count matrix of size 13022 by 590
## Model formula is y ~ log_umi
## Get Negative Binomial regression parameters per gene
## Using 2000 genes, 590 cells
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |==================                                                    |  25%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |====================================================                  |  75%
  |                                                                            
  |======================================================================| 100%
## There are 1 estimated thetas smaller than 1e-07 - will be set to 1e-07
## Found 45 outliers - those will be ignored in fitting/regularization step
## Second step: Get residuals using fitted parameters for 13022 genes
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |===                                                                   |   4%
  |                                                                            
  |=====                                                                 |   7%
  |                                                                            
  |========                                                              |  11%
  |                                                                            
  |==========                                                            |  15%
  |                                                                            
  |=============                                                         |  19%
  |                                                                            
  |================                                                      |  22%
  |                                                                            
  |==================                                                    |  26%
  |                                                                            
  |=====================                                                 |  30%
  |                                                                            
  |=======================                                               |  33%
  |                                                                            
  |==========================                                            |  37%
  |                                                                            
  |=============================                                         |  41%
  |                                                                            
  |===============================                                       |  44%
  |                                                                            
  |==================================                                    |  48%
  |                                                                            
  |====================================                                  |  52%
  |                                                                            
  |=======================================                               |  56%
  |                                                                            
  |=========================================                             |  59%
  |                                                                            
  |============================================                          |  63%
  |                                                                            
  |===============================================                       |  67%
  |                                                                            
  |=================================================                     |  70%
  |                                                                            
  |====================================================                  |  74%
  |                                                                            
  |======================================================                |  78%
  |                                                                            
  |=========================================================             |  81%
  |                                                                            
  |============================================================          |  85%
  |                                                                            
  |==============================================================        |  89%
  |                                                                            
  |=================================================================     |  93%
  |                                                                            
  |===================================================================   |  96%
  |                                                                            
  |======================================================================| 100%
## Calculating gene attributes
## Wall clock passed: Time difference of 8.380729 secs
## Adding ID1 WIF1 SFRP1 to the gene set used for PCA in Patient1 HS

sce.patient1_SCC.fibro <- cluster_subtype(sce.patient1_SCC, "fibroblast", known_markers.fibro, genes.fibro)
## Calculating cell attributes from input UMI matrix: log_umi
## Variance stabilizing transformation of count matrix of size 14691 by 759
## Model formula is y ~ log_umi
## Get Negative Binomial regression parameters per gene
## Using 2000 genes, 759 cells
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |==================                                                    |  25%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |====================================================                  |  75%
  |                                                                            
  |======================================================================| 100%
## There are 1 estimated thetas smaller than 1e-07 - will be set to 1e-07
## Found 69 outliers - those will be ignored in fitting/regularization step
## Second step: Get residuals using fitted parameters for 14691 genes
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |==                                                                    |   3%
  |                                                                            
  |=====                                                                 |   7%
  |                                                                            
  |=======                                                               |  10%
  |                                                                            
  |=========                                                             |  13%
  |                                                                            
  |============                                                          |  17%
  |                                                                            
  |==============                                                        |  20%
  |                                                                            
  |================                                                      |  23%
  |                                                                            
  |===================                                                   |  27%
  |                                                                            
  |=====================                                                 |  30%
  |                                                                            
  |=======================                                               |  33%
  |                                                                            
  |==========================                                            |  37%
  |                                                                            
  |============================                                          |  40%
  |                                                                            
  |==============================                                        |  43%
  |                                                                            
  |=================================                                     |  47%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |=====================================                                 |  53%
  |                                                                            
  |========================================                              |  57%
  |                                                                            
  |==========================================                            |  60%
  |                                                                            
  |============================================                          |  63%
  |                                                                            
  |===============================================                       |  67%
  |                                                                            
  |=================================================                     |  70%
  |                                                                            
  |===================================================                   |  73%
  |                                                                            
  |======================================================                |  77%
  |                                                                            
  |========================================================              |  80%
  |                                                                            
  |==========================================================            |  83%
  |                                                                            
  |=============================================================         |  87%
  |                                                                            
  |===============================================================       |  90%
  |                                                                            
  |=================================================================     |  93%
  |                                                                            
  |====================================================================  |  97%
  |                                                                            
  |======================================================================| 100%
## Calculating gene attributes
## Wall clock passed: Time difference of 10.26022 secs
## Adding ID1 to the gene set used for PCA in Patient1 SCC

sce.patient2_HS.fibro <- cluster_subtype(sce.patient2_HS, "fibroblast", known_markers.fibro, genes.fibro)
## Calculating cell attributes from input UMI matrix: log_umi
## Variance stabilizing transformation of count matrix of size 14296 by 1156
## Model formula is y ~ log_umi
## Get Negative Binomial regression parameters per gene
## Using 2000 genes, 1156 cells
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |==================                                                    |  25%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |====================================================                  |  75%
  |                                                                            
  |======================================================================| 100%
## Found 94 outliers - those will be ignored in fitting/regularization step
## Second step: Get residuals using fitted parameters for 14296 genes
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |==                                                                    |   3%
  |                                                                            
  |=====                                                                 |   7%
  |                                                                            
  |=======                                                               |  10%
  |                                                                            
  |==========                                                            |  14%
  |                                                                            
  |============                                                          |  17%
  |                                                                            
  |==============                                                        |  21%
  |                                                                            
  |=================                                                     |  24%
  |                                                                            
  |===================                                                   |  28%
  |                                                                            
  |======================                                                |  31%
  |                                                                            
  |========================                                              |  34%
  |                                                                            
  |===========================                                           |  38%
  |                                                                            
  |=============================                                         |  41%
  |                                                                            
  |===============================                                       |  45%
  |                                                                            
  |==================================                                    |  48%
  |                                                                            
  |====================================                                  |  52%
  |                                                                            
  |=======================================                               |  55%
  |                                                                            
  |=========================================                             |  59%
  |                                                                            
  |===========================================                           |  62%
  |                                                                            
  |==============================================                        |  66%
  |                                                                            
  |================================================                      |  69%
  |                                                                            
  |===================================================                   |  72%
  |                                                                            
  |=====================================================                 |  76%
  |                                                                            
  |========================================================              |  79%
  |                                                                            
  |==========================================================            |  83%
  |                                                                            
  |============================================================          |  86%
  |                                                                            
  |===============================================================       |  90%
  |                                                                            
  |=================================================================     |  93%
  |                                                                            
  |====================================================================  |  97%
  |                                                                            
  |======================================================================| 100%
## Calculating gene attributes
## Wall clock passed: Time difference of 15.11345 secs

sce.patient2_AK.fibro <- cluster_subtype(sce.patient2_AK, "fibroblast", known_markers.fibro, genes.fibro)
## Calculating cell attributes from input UMI matrix: log_umi
## Variance stabilizing transformation of count matrix of size 12777 by 275
## Model formula is y ~ log_umi
## Get Negative Binomial regression parameters per gene
## Using 2000 genes, 275 cells
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |==================                                                    |  25%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |====================================================                  |  75%
  |                                                                            
  |======================================================================| 100%
## Found 53 outliers - those will be ignored in fitting/regularization step
## Second step: Get residuals using fitted parameters for 12777 genes
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |===                                                                   |   4%
  |                                                                            
  |=====                                                                 |   8%
  |                                                                            
  |========                                                              |  12%
  |                                                                            
  |===========                                                           |  15%
  |                                                                            
  |=============                                                         |  19%
  |                                                                            
  |================                                                      |  23%
  |                                                                            
  |===================                                                   |  27%
  |                                                                            
  |======================                                                |  31%
  |                                                                            
  |========================                                              |  35%
  |                                                                            
  |===========================                                           |  38%
  |                                                                            
  |==============================                                        |  42%
  |                                                                            
  |================================                                      |  46%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |======================================                                |  54%
  |                                                                            
  |========================================                              |  58%
  |                                                                            
  |===========================================                           |  62%
  |                                                                            
  |==============================================                        |  65%
  |                                                                            
  |================================================                      |  69%
  |                                                                            
  |===================================================                   |  73%
  |                                                                            
  |======================================================                |  77%
  |                                                                            
  |=========================================================             |  81%
  |                                                                            
  |===========================================================           |  85%
  |                                                                            
  |==============================================================        |  88%
  |                                                                            
  |=================================================================     |  92%
  |                                                                            
  |===================================================================   |  96%
  |                                                                            
  |======================================================================| 100%
## Calculating gene attributes
## Wall clock passed: Time difference of 5.062623 secs
## Adding CCL19 to the gene set used for PCA in Patient2 AK

# The following code was used to check which genes were missing from in the data
# the known_markers are from the litterature

data_markers <- rowData(sce.patient1_HS)$Symbol
known_markers <- c("DSC3","DSP","LGALS7","KRT5","KRT14","COL17A1","KRT1","KRT10","LOR", "SPINK5","KRT6B","GJB2","GJB6","ATP1B1","MGST1","FASN","DCD","AQP5","DCDN","LUM","MMP2","WISP2","SLPI","CTHRC1","MFAP5","TSPAN8","CCL19","APOE","CXCL2","CXCL3","EFEMP1","APCDD1","ID1","WIF1","COL18A1","PTGDS","ASPN","POSTN","GPC3","TNN","SFRP1","ACTA2","TAGLN","RGS5","MYL9","TPM2","RERGL","PECAM1","SELE","CLDN5","VWF","PROX1","LYVE1","HLA-DRA","FCER1G","TYROBP","AIF1","CD1C","CD207","CD68","RNASE1","SEPP1","ITGAX","CD3D","CD3E","CD52","IL7R","DCT","MLANA","PMEL","TYP1","CDH19","NGFR","UBE2C","PCNA")


idx_notfound <- which(!(known_markers %in% rowData(sce.patient1_HS)$Symbol))

print(paste("Done:", length(known_markers)-length(idx_notfound), "/", length(known_markers), "markers found"))
cat("Genes missing in dataset: ", known_markers[idx_notfound])